# ------------------------------------------------------------------------------
# Replication Materials
# 
# title: Eliciting Beliefs as Distributions in Online Surveys
# journal: Political Analysis
# authors: Lucas Leemann, Richard Traunmüller, and Lukas Stoetzer
# date: August 2020
# ------------------------------------------------------------------------------


  library(tidyverse)
  library(xtable)
  options(xtable.comment = FALSE)
  source("fun/fun_electbeleifs_MLE.R")
  source("fun/fun_utilities.R")

  

  source("00_DataCleaner_symmetric.R") # Richard Code to clean Data
  dat_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric.R") # Richard Code to clean Data
  dat_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_symmetric_variance.R") # Richard Code to clean Data
  dat_sym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_asymmetric_variance.R") # Richard Code to clean Data
  dat_asym_var <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_sym.R") # Richard Code to clean Data
  dat_nach_sym <- filter(data, screen1==1, screen2==1) # Filter with screen questions

  source("00_DataCleaner_nachtrag_asym.R") # Richard Code to clean Data
  dat_nach_asym <- filter(data, screen1==1, screen2==1) # Filter with screen questions


# Estimation  ============

  res <- list() # Result List

#+ Quantile, echo=FALSE, include=FALSE
## Quantile Question  ============

  # Estimate Model for Quantile Question Symmetric, Low Variance
  res[["sym_lvar_quant"]] <- dat_sym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(60,60))

  # Estimate Model for Quantile Question Asymmetric, Low Variance
  res[["asym_lvar_quant"]]  <- dat_asym %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, High Variance
  res[["sym_hvar_quant"]] <- dat_sym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(30,30))

  # Estimate Model for Quantile Question Asymmetric Variance, High Variance
  res[["asym_hvar_quant"]] <- dat_asym_var %>%
    creat_dat_Y(method="Quantile") %>%
    est_eB_beta(true.val = c(30,15))

#+ Quantile2, echo=FALSE, include=FALSE
## Quantile Question (without correction)  ============

  # # Estimate Model for Quantile Question Symmetric, Low Variance
  # res[["sym_lvar_qtnsc"]] <- dat_nach_sym %>%
  #    creat_dat_Y(method="QuantileNoCorrect") %>%
  #    est_eB_beta(true.val = c(60,60))
  #  
  # # Estimate Model for Quantile Question Asymmetric, Low Variance
  #  res[["asym_lvar_qtnsc"]]  <- dat_nach_asym %>%
  #    creat_dat_Y(method="Quantile") %>%
  #    est_eB_beta(true.val = c(60,30))

      
## Intervall Question (Wide) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intwi"]]  <- dat_sym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(60,60))

  # Estimate Model for Quantile Question Aaymmetric, low Variance
  res[["asym_lvar_intwi"]]  <- dat_asym %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intwi"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intwi"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval") %>%
    est_eB_beta_int(cut.offs = c(0.40,0.60), true.val = c(30,15))

#+ Intervall2, echo=FALSE, include=FALSE
## Intervall Question (Narrow) ============

  # Estimate Model for Quantile Question Symmetric, low variance
  res[["sym_lvar_intna"]]  <- dat_sym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric
  res[["asym_lvar_intna"]]  <- dat_asym %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["sym_hvar_intna"]]  <- dat_sym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high variance
  res[["asym_hvar_intna"]]  <- dat_asym_var %>%
    creat_dat_Y("Interval2") %>%
    est_eB_beta_int(cut.offs = c(0.45,0.55), true.val = c(30,15))

#+ Manski, echo=FALSE, include=TRUE
## Manski Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_manski"]]  <- dat_sym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(60,60))

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_lvar_manski"]]  <- dat_asym %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_hvar_manski"]]  <- dat_sym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(30,30))

  # Estimate Model for Quantile Question Asymmetric, low Variance
  res[["asym_hvar_manski"]]  <- dat_asym_var %>%
    creat_dat_Y("Manski") %>%
    est_eB_beta_manski(true.val = c(30,15))

#+ BinsBalls, echo=FALSE, include=FALSE
## Bins and Balls Question  ============

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["sym_lvar_binba"]]  <- dat_sym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(60,60))

  # Estimate Model for Quantile Question Symmetric, low Variance
  res[["asym_lvar_binba"]]  <- dat_asym %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(60,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["sym_hvar_binba"]]  <- dat_sym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(30,30))

  # Estimate Model for Quantile Question Symmetric, high Variance
  res[["asym_hvar_binba"]]  <- dat_asym_var %>%
    creat_dat_Y("BinsBalls") %>%
    est_eB_beta_binsballs(true.val = c(30,15))

#+ Figure, echo=FALSE, include=TRUE, fig.cap = "Comparsion of average elecited beliefs and true data distribution"
## Figure Average Estimate  ============

  # Get estimates
  par_est <- data.frame(t(sapply(sapply(res, "[[", "par"),
                                 function(res) res[names(res) %in% c("alpha","beta")])))

  # Name estimates  & create Factors
  par_est <- par_est %>%
    mutate(type=names(res)) %>%
    separate("type",c("type","var","method")) %>%
    mutate(type = factor(type, levels = c("sym","asym"), labels = c("Symmetric","Asymmetric")),
           var = factor(var, levels = c("lvar","hvar"),labels = c("Small Variance","Large Variance")),
           method = factor(method, labels = c("Bins and Balls","Interval (Narrow)",
                                              "Interval (Wide)", "Manski",
                                              "Quantile")),
           N = sapply(res, "[[", "n"))

  # Expand Data.frame
  expand.grid.df <- function(...) Reduce(function(...) merge(..., by=NULL), list(...))
  df_est <- expand.grid.df(data.frame("x"=seq(0.2,0.8,0.01)), par_est)

  # Prepare Data for Plot
  df_est       <- mutate(df_est, db = dbeta(x,alpha,beta))

  
  df_true <- data.frame(expand.grid("type"=c("Symmetric","Asymmetric"),
                        "var"=c("Large Variance","Small Variance")) %>%
             mutate(alpha_true = c(30,30,60,60),
                    beta_true = c(30,15,60,30)))

  # Left Join
  df_est <- left_join(df_est,df_true, by=c("type","var")) %>%
     mutate(db_true = dbeta(x,alpha_true,beta_true)) %>%
      select(-alpha_true, -beta_true) %>%
      gather("db","db_true",key=Scenario, value=val) %>%
      mutate(Scenario = factor(ifelse(Scenario=="db","Elicited Beliefs","Data Distribution")),
             Scenario = relevel(Scenario, "Elicited Beliefs"))

  # Plot Data
  ggplot(df_est) +
    geom_line(aes(x=x,y=val,linetype=Scenario)) +
    facet_grid(method~ type+var) +
    scale_color_grey() +
    theme_minimal() +
    xlab("Vote Share") + ylab("Density") +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                             colour = "grey96",
                             size = 0, linetype = "solid"),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank()
    )

  ggsave("out/fig/Fig_3.pdf",width = 12,height = 12)

#+ table, echo=FALSE, include=TRUE, results="asis"
## Estimates Table  ============

  # Prepare Table for symmetric Results
  est_tab <- left_join(par_est, df_true)  %>%
                 mutate(lik = sapply(res, "[[", "log_lik"),
                        lr = sapply(res, "[[", "lr")) %>%
                 rowwise() %>%
                 mutate(KL = KL_dis_beta(c(alpha_true,beta_true), as.numeric(c(alpha,beta)))) %>%
                 select(c("method","type","var","alpha","beta","KL","lik","lr","N"))

  ls_est <- split(est_tab, list(est_tab$type, est_tab$var))
  nam.l <- c("2_c","2_d","2_a","2_b")
  for(nam in names(ls_est)){
    scena <- which(nam == names(ls_est))
    dat_tab <- ls_est[[nam]] %>% arrange(KL) %>%
          select(c("method","alpha","beta","KL","lr","N"))
    types <- strsplit(nam,"[.]")[[1]]
    text <- paste("Estimates for", types[1],"Distribution with",types[2],sep=" ")
    print(xtable(dat_tab, caption=text), floating = FALSE,booktabs = T, include.rownames=FALSE,
          #file=paste("out/fig/Tab_ElecitedBeliefs_",str_replace(nam," ",""),"_MLE.tex",sep=""))
          file=paste("out/fig/Tab_",str_replace(nam.l[scena]," ",""),".tex",sep=""))
  }

  # Summary Table
  dat_tab_sum <- est_tab %>% 
                  group_by(method) %>%
                  summarise(KL_sum = sum(KL),
                            N = sum(N)) %>%
                  arrange(KL_sum)

  #print(xtable(dat_tab_sum, caption="Summary Kullback–Leibler divergence over four Applications"), floating = FALSE,booktabs = T, include.rownames=FALSE,
   #     file=paste("out/fig/Tab_ElecitedBeliefs_SUM_MLE.tex",sep=""))
  
    
  # Summary Dot-Plot
  
  ggplot(dat_tab_sum) +
    geom_point(aes(y=reorder(method,-KL_sum),x=KL_sum), size=5) +
    geom_vline(xintercept = 0, col="red", alpha=0.2) +
    theme_minimal() + 
    xlab("Kullback-Leibler divergence") +
    xlim(0,1.75) +
    theme(axis.title.y=element_blank(),
          text = element_text(size=20),
          strip.text.y = element_text(angle = 360),
          legend.position = "bottom",
         # axis.text.y=element_blank(), 
          axis.ticks.y=element_blank(),
          panel.background = element_rect(fill = "grey96",
                                          colour = "grey96",
                                          size = 0, linetype = "solid"),
          #panel.grid.major = element_blank(),
          #panel.grid.minor = element_blank()
    )
  
  ggsave("out/fig/Fig_4.pdf",width = 12,height = 4)
  